/*------------------------------------------------------------------------------ 

			Link the B8 students to their B4 data and SNED

------------------------------------------------------------------------------*/

global replica_dir="<add path to your working directory>/prep replica"
global data_dir="$replica_dir/data"
global output_dir="$replica_dir/output"

global code_dir="$replica_dir/code"

/*----------------------------------------------------------------------------*/

**# Clean the raw data files

/*----------------------------------------------------------------------------*/


run "$code_dir/cleanup_simce_b8_2015.do"
run "$code_dir/cleanup_simce_b8_2013.do"
run "$code_dir/cleanup_simce_b8_2011.do"

run "$code_dir/cleanup_simce_b4_2011.do"
run "$code_dir/cleanup_simce_b4_2009.do"
run "$code_dir/cleanup_simce_b4_2007.do"

run "$code_dir/cleanup_snedforsimce.do"
 

/*----------------------------------------------------------------------------*/

**# Link SIMCE 8b 2015 + 4B 2011 for SNED 2014 

/*----------------------------------------------------------------------------*/

use 				 "$data_dir/simce_b4_2011.dta", clear
merge 1:1 mrun using "$data_dir/simce_b8_2015.dta"
keep if _merge==3
drop _merge

rename rbd_pre rbd 
gen sned_yr=2014

save "$data_dir/simce4b8_2011_2015.dta",replace


/*----------------------------------------------------------------------------*/

**# Link SIMCE 8b 2013 + 4B 2009 for SNED 2012 

/*----------------------------------------------------------------------------*/

use 				 "$data_dir/simce_b4_2009.dta", clear
merge 1:1 mrun using "$data_dir/simce_b8_2013.dta"
keep if _merge==3
drop _merge

rename rbd_pre rbd  
gen sned_yr=2012

save "$data_dir/simce4b8_2009_2013.dta",replace

 
/*----------------------------------------------------------------------------*/

**# Link SIMCE 8b 2011 + 4B 2007 for SNED 2010 

/*----------------------------------------------------------------------------*/
 
use 				 "$data_dir/simce_b4_2007.dta", clear
merge 1:1 mrun using "$data_dir/simce_b8_2011.dta"
keep if _merge==3
drop _merge

rename rbd_pre rbd  
gen sned_yr=2010


save "$data_dir/simce4b8_2007_2011.dta",replace


*------------------------------------------------------------------------------* 

**# Stack up all cohorts and link to SNED panel

*------------------------------------------------------------------------------*

use 		 "$data_dir/simce4b8_2011_2015.dta", clear 
append using "$data_dir/simce4b8_2009_2013.dta" 
append using "$data_dir/simce4b8_2007_2011.dta"

merge m:1 rbd sned_yr using "$data_dir/stacked_sned.dta"
keep if _merge==3
drop _merge

save "$data_dir/panel_simce4b8_sned_studlvl.dta", replace


*------------------------------------------------------------------------------*

**# Derived variables 

*------------------------------------------------------------------------------*

gen sch_municip_pre =(sch_depe1_pre==1  | sch_depe1_pre==2 ) 
gen sch_municip_post=(sch_depe1_post==1  | sch_depe1_post==2) 


***

tab sned_yr, gen(sned_yr_)

***

tab pad_inc_pre, gen(pad_inc_pre_)
tab mom_edu_pre, gen(mom_edu_pre_)
tab dad_edu_pre, gen(dad_edu_pre_)

***

tab sch_gse_pre, gen(sch_gse_pre_)

***

tab hg if !missing(hg), gen(hg_)

***

gen leave_sch=(rbd!=rbd_post & rbd_post!=. & rbd!=.)
gen stay_sch= (rbd==rbd_post & rbd_post!=. & rbd!=.)


***

gen 	treat25=(sel==1)
replace treat25=. if sel==3

gen 	treat10=(sel==2)
replace treat10=. if sel==1
 
gen st_ind25_sq=st_ind25*st_ind25
gen st_ind10_sq=st_ind10*st_ind10

gen inter25   =treat25*st_ind25
gen inter10   =treat10*st_ind10

gen inter25_sq=treat25*st_ind25_sq
gen inter10_sq=treat10*st_ind10_sq


***

encode 	tipo_est, gen(tipo_est_num)
recode 	tipo_est_num (1 = 1) (2 3 = 2) (4 = 3)
tab 	tipo_est_num, gen(tipo_est_num_)

keep if tipo_est=="A" | tipo_est=="B" | tipo_est=="C" | tipo_est=="D"


la var mate_post "Grade 8 Math Score"
la var lect_post "Grade 8 Reading Score"
la var nat_post  "Grade 8 Natural Sciences"

la var treat25 "Full vs. Partial Bonus"
la var treat10 "Partial vs. No Bonus"


save "$data_dir/panel_simce4b8_sned_studlvl.dta", replace




*** 

collapse (first) cut25 cut10 st_ind25 st_ind10 sch_municip_pre , by(hg sned_yr)

foreach i in 25 10{
gen mean_cut`i'=. 
gen mean_cut`i'_pv=. 
gen mean_cut`i'_mv=. 

gen median_cut`i'=. 
gen median_cut`i'_pv=. 
gen median_cut`i'_mv=. 

	foreach year in 2010 2012 2014 {
	sum 	cut`i' 					 if st_ind`i'!=.  			 			& sned_yr==`year', detail 
	replace mean_cut`i'	 =r(mean)	 if st_ind`i'!=.  						& sned_yr==`year'

	sum 	cut`i' 				 	 if st_ind`i'!=.  & sch_municip_pre==0 	& sned_yr==`year', detail 
	replace mean_cut`i'_pv  =r(mean) if st_ind`i'!=.  & sch_municip_pre==0 	& sned_yr==`year'

	sum 	cut`i' 				 	 if st_ind`i'!=.  & sch_municip_pre==1 	& sned_yr==`year', detail 
	replace mean_cut`i'_mv  =r(mean) if st_ind`i'!=.  & sch_municip_pre==1 	& sned_yr==`year'

	}
}

keep mean_* median_* hg sned_yr

merge 1:m hg sned_yr using "$data_dir/panel_simce4b8_sned_studlvl.dta"
drop _merge

save "$data_dir/panel_simce4b8_sned_studlvl.dta", replace


*------------------------------------------------------------------------------*

**# Erase redundant data: 

*------------------------------------------------------------------------------*


cd "$data_dir"

local list : dir . files "simce*.dta"
foreach f of local list {
    erase "`f'"
}


local list : dir . files "sned*.dta"
foreach f of local list {
    erase "`f'"
}

local list : dir . files "temp*.dta"
foreach f of local list {
    erase "`f'"
}


erase stacked_sned.dta


*------------------- END



